# -*- coding: utf-8 -*-

import re
import time
import urllib

from xbmcup.net import HTTP
from xbmcup.cache import Cache
from xbmcup.html import Clear

import xbmc


GENRE = {
    'anime': 1750,
    'biography': 22,
    'action': 3,
    'western': 13,
    'military': 19,
    'detective': 17,
    'children': 456,
    'for adults': 20,
    'documentary': 12,
    'drama': 8,
    'game': 27,
    'history': 23,
    'comedy': 6,
    'concert': 1747,
    'short': 15,
    'criminal': 16,
    'romance': 7,
    'music': 21,
    'cartoon': 14,
    'musical': 9,
    'news': 28,
    'adventures': 10,
    'realitytv': 25,
    'family': 11,
    'sports': 24,
    'talk shows': 26,
    'thriller': 4,
    'horror': 1,
    'fiction': 2,
    'filmnoir': 18,
    'fantasy': 5
}


class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    
    def __init__(self):
        self.cache = Cache('kinopoisk.db')
        self.html = Clear()
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }
        
        
    # API
    
    def scraper(self, name, year=None, trailer_quality=None):
        
        try:
            tag = 'scraper:' + urllib.quote_plus(name.encode('windows-1251'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)
            
            id = self.cache.get(tag, self._scraper, name, year)
            if not id:
                return None
            
            return self.movie(id, trailer_quality)
    
    
    def movie(self, id, trailer_quality=None):
        id = str(id)
        
        if trailer_quality is None:
            trailer_quality = 6
        
        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None
        
        if movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)
            
            movie['trailers'] = video
            
            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]
        
        return movie
    
    
    def search(self, name, trailer_quality=None):
        return self._search_movie(name)
    
    
    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)
        
        url = 'http://www.kinopoisk.ru/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(kwarg.get('votes', 100)) + '/'
        
        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'
        
        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'
        
        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'
        
        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'
        
        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'
        
        url += 'perpage/' + str(limit) + '/order/ex_rating/'
        
        if page > 1:
            url += 'page/' + str(page) + '/'
        
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        
        res = {'pages': (1, 0, 1, 0), 'data': []}
        
        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo', re.U|re.S).search(response.body.decode('windows-1251'))
        if r:
            
            body = r.group(1)
            
            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)', re.U).search(body)
            if p:
                page = (int(p.group(1))-1)/limit + 1
                total = int(p.group(2))
                pages = total/limit
                if limit*pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page-1, page, 0 if page==pages else page+1)
            # end compile
            
            for id in re.compile('<div id="tr_([0-9]+)"', re.U|re.S).findall(body):
                res['data'].append(int(id))
        
        return res
    
    
    def person(self, name):
        response = self.http.fetch('http://www.kinopoisk.ru/s/type/people/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant/', headers=self.headers)
        if response.error:
            return None
        
        res = []
        body = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U|re.S).search(response.body.decode('windows-1251'))
        if body:
            
            for block in re.compile('<p class="pic">(.+?)<div class="clear">', re.U|re.S).findall(body.group(1)):
                
                id, name, original, year, poster = None, None, None, None, None
                
                r = re.compile('<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>', re.U|re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()
                    
                    if id and name:
                        
                        r = re.compile('<span class="gray">([^<]+)</span>', re.U|re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None
                        
                        r = re.compile('<span class="year">([0-9]{4})</span>', re.U|re.S).search(block)
                        if r:
                            year = int(r.group(1))
                        
                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'
                        
                        res.append({'id': int(id), 'name': name, 'originalname': original, 'year': year, 'poster': poster})
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) + '/', headers=self.headers)
        if response.error:
            return None
        
        res = {}
        
        r = re.compile('id="sort_block">(.+?)<div class="go_up"', re.U|re.S).search(response.body.decode('windows-1251'))
        if r:
            
            for block in r.group(1).split(u'<table cellspacing=0 cellpadding=0 border=0 width=100%>'):
                work = None
                for w in ('actor', 'director', 'writer', 'producer', 'producer_ussr', 'composer', 'operator', 'editor', 'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break
                
                if work:
                    
                    movies = []
                    
                    for id, name in re.compile('<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>', re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))
                    
                    if movies:
                        res.setdefault(work, []).extend(movies)
            
        return res
    
    
    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s, self._review, id, query_s)
        if not data:
            return data
        return data[query]
        
    
    # PRIVATE
    
    
    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant'
        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'
        
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        
        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U|re.S).search(response.body.decode('windows-1251'))
        if r:
            for id in re.compile('<p class="name"><a href="/level/1/film/([0-9]+)', re.U|re.S).findall(r.group(1)):
                res.append(int(id))
        
        return {'pages': (1, 0, 1, 0), 'data': res}
    
        
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search_movie(name, year)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]
    
    
    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'
        
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None
        
        html = response.body.decode('windows-1251')
        
        res = {
            'stat': {'all': 0, 'good': 0, 'bad': 0, 'neutral': 0},
            query: []
        }
        
        r = re.compile('<ul class="resp_type">(.+?)</ul>', re.U|re.S).search(html)
        if r:
            ul = r.group(1)
            
            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile('<li class="' + q + '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>', re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))
            
            res['stat']['all'] = res['stat']['good'] + res['stat']['bad'] + res['stat']['neutral']
        
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U|re.S).search(html)
        if r:
            
            for block in r.group(1).split('itemprop="reviews"'):
                
                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }
                
                
                r = re.compile('itemprop="reviewBody">(.+?)</div>', re.U|re.S).search(block)
                if r:
                    
                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'), (u'</b>', u'[/B]'), (u'<i>', u'[I]'), (u'</i>', u'[/I]'), (u'<u>', u'[U]'), (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)
                        
                    r = self.html.text(text)
                    if r:
                        review['review'] = r
                
                
                user = None
                r = re.compile('<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>').search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>').search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user
                            
                
                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title
                
                
                r = re.compile('<span class="date">([^<]+)</span>', re.U|re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')
                
                
                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>', re.U|re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))
                
                
                if review['nick'] and review['review']:
                    res[query].append(review)
        
        return 3600, res # one hour
            
    
    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers)
        if response.error:
            return False, None
        
        html = response.body.decode('windows-1251')
        
        res = {
            'id': int(id),
            'thumb': None,
            'trailers': [],
            'info': {}
        }
        
        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, t in (
            ('title', '<title>(.+?)</title>', 'str'),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', 'str'),
            ('tagline', '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', 'str'),
            ('mpaa', 'itemprop="contentRating"\s+content="MPAA\s+([^"]+)"', 'str'),
            ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', 'str'),
            ('year', '<a href="/level/10/m_act%5Byear%5D/([0-9]+)/"', 'int'),
            ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')
            
            ):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = value
                    if t == 'int':
                        res['info'][tag] = int(res['info'][tag])
                    else:
                        res['info'][tag] = self.html.string(res['info'][tag])
        
        # режисеры, сценаристы, жанры
        for tag, reg in (
            ('director', u'<td class="type">режиссер</td><td[^>]*>(.+?)</td>'),
            ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
            ('genre', u'<td class="type">жанр</td><td[^>]*>(.+?)</td>')
            ):
            r = re.compile(reg, re.U).search(html)
            if r:
                r2 = []
                for r in re.compile('<a [^>]+>([^<]+)</a>', re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)
        
        # актеры
        r = re.compile(u'<span class="title">В главных ролях:</span><div>(.+?)</div>', re.U|re.S).search(html)
        if r:
            actors = []
            for r in re.compile('<span itemprop="actors"><a [^>]+>([^<]+)</a></span>', re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
        
        # описание фильма
        r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot
        
        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()
        
        # премьера
        r = re.compile(u'премьера \(мир\)</td>.+?>([0-1]{1,2}) ([^0-9]+) ([0-9]{4})</a>', re.U|re.S).search(html)
        if r:
            i = 0
            for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'):
                i += 1
                if mon == r.group(2):
                    mon = str(i)
                    if len(mon) == 1:
                        mon = '0' + mon
                    day = r.group(1)
                    if len(day) == 1:
                        day = '0' + day
                    res['info']['premiered'] = '-'.join([r.group(3), mon, day])
                    break
        
        menu = re.compile('<ul class="film_menu">(.+?)</ul>', re.U|re.S).search(html)
        if menu:
            menu = menu.group(1)
        
            # постеры
            if menu.find('/film/' + id + '/posters/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/posters/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    posters = re.compile('<a href="/picture/([0-9]+)/" target="_blank"', re.U).findall(html)
                    if posters:
                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + posters[-1] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('<img[^>]+ id="image" src="([^"]+)"[^>]+>').search(html)
                            if r:
                                res['thumb'] = 'http://st.kinopoisk.ru' + r.group(1)
            
            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile('<a href="/level/10/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)
            
            # трэйлеры
            
            trailers1 = [] # русские трейлеры
            trailers2 = [] # другие русские видео
            trailers3 = [] # трейлеры
            trailers4 = [] # другие видео
            
            if menu.find('/film/' + id + '/video/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    
                    for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html):
                        
                        # отсекаем лишние блоки
                        if row.find(u'>СМОТРЕТЬ</a>') != -1:
                            
                            # русский ролик?
                            if row.find('flags/flag-2.gif') == -1:
                                is_ru = False
                            else:
                                is_ru = True
                            
                            # получаем имя трейлера
                            r = re.compile('<a href="/film/' + id + '/video/t/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row)
                            if r:
                                name = self.html.string(r.group(1))
                                if name:
                                    
                                    trailer = {
                                        'name': name,
                                        'time': None,
                                        'trailer': False,
                                        'ru': is_ru,
                                        'video': []
                                    }
                                    
                                    # трейлер или тизер?
                                    for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'):
                                        if name.find(token) != -1:
                                            trailer['trailer'] = True
                                            break
                                    
                                    # получаем время трейлера
                                    r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row)
                                    if r:
                                        trailer['time'] = r.group(1).strip()
                                    
                                    # делим ролики по качеству
                                    for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row):
                                        quality = int(r[0])
                                        if r[1].find('icon-hd') != -1:
                                            quality += 3
                                        
                                        trailer['video'].append((quality, r[2].strip(), r[3]))
                                    
                                    if trailer['video']:
                                        if trailer['ru']:
                                            if trailer['trailer']:
                                                trailers1.append(trailer)
                                            else:
                                                trailers2.append(trailer)
                                        else:
                                            if trailer['trailer']:
                                                trailers3.append(trailer)
                                            else:
                                                trailers4.append(trailer)
            
            # склеиваем трейлеры
            res['trailers'].extend(trailers1)
            res['trailers'].extend(trailers2)
            res['trailers'].extend(trailers3)
            res['trailers'].extend(trailers4)
        
        timeout = True
        # еслли фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
        
    
